Introduction

I’ve created 10,000 simulations of populations undergoing different levels of sexual reproduction and analyzed the index of association along with genotypic and allelic diversity metrics, including an analysis of the contribution of each locus to the genotypic diversity. This will serve as a report for the simulations.

Setup

library('zksimanalysis')
## Loading required package: tidyverse
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
## Loading required package: adegenet
## Loading required package: ade4
## 
##    /// adegenet 2.1.0 is loaded ////////////
## 
##    > overview: '?adegenet'
##    > tutorials/doc/questions: 'adegenetWeb()' 
##    > bug reports/feature requests: adegenetIssues()
## Loading required package: poppr
## This is poppr version 2.4.1.99.2. To get started, type package?poppr
## OMP parallel support: available
## 
## This version of poppr is under development.
## If you find any bugs, please report them at https://github.com/grunwaldlab/poppr/issues
## Loading required package: feather
## Loading required package: vcfR
## 
##    *****       ***   vcfR   ***       *****
##    This is vcfR 1.5.0 
##      browseVignettes('vcfR') # Documentation
##      citation('vcfR') # Citation
##    *****       *****      *****       *****
## Loading required package: poweRlaw
## Loading required package: flux
## Loading required package: caTools
## This is flux 0.3-0
## Loading required package: viridis
## Loading required package: viridisLite

This here will load in all of the results of the analyses. These were previously processed in the file ssr_data_cleaning.Rmd. The name of the data was called “vals”.

system.time({
res           <- load(here::here("data", "ma_processed_results.rda"))
even_mutation <- vals %>% filter(mutation_rate == "even")
odd_mutation  <- vals %>% filter(mutation_rate == "uneven")
})
##    user  system elapsed 
##   4.867   0.101   4.973
res
## [1] "vals"
even_mutation
## # A tibble: 39,996 x 58
##           Ia  p.Ia     rbarD  p.rD       Iacc p.Iacc     rbarDcc p.rDcc
##        <dbl> <dbl>     <dbl> <dbl>      <dbl>  <dbl>       <dbl>  <dbl>
##  1 15.287206 0.001 0.8163678 0.001 10.5654835  0.001  0.56319394  0.001
##  2 13.898508 0.001 0.7344525 0.001 -0.4002157  0.912 -0.02200733  0.910
##  3 13.808291 0.001 0.7295229 0.001  8.1645478  0.001  0.43319642  0.001
##  4 13.492567 0.001 0.7120693 0.001  9.3715550  0.001  0.49562349  0.001
##  5 17.472262 0.001 0.9896896 0.001 16.9152542  0.001  0.96893204  0.001
##  6 17.391951 0.001 0.9739460 0.001 16.6426643  0.001  0.93234202  0.001
##  7 17.449077 0.001 0.9876070 0.001 17.0898597  0.001  0.96614162  0.001
##  8 17.026281 0.001 0.9504304 0.001 14.4834730  0.001  0.78440171  0.001
##  9  2.687697 0.001 0.4526816 0.001  1.9636364  0.007  0.32747142  0.001
## 10  2.719831 0.001 0.4005348 0.001  1.9230769  0.001  0.28313216  0.001
## # ... with 39,986 more rows, and 50 more variables: pop <chr>, run <chr>,
## #   seed <chr>, sexrate <chr>, gen <chr>, rep <chr>, sample <fctr>,
## #   mutation_rate <chr>, H <dbl>, G <dbl>, lambda <dbl>, E.5 <dbl>,
## #   B <dbl>, uSimp <dbl>, NMLG <dbl>, H.var <dbl>, G.var <dbl>,
## #   lambda.var <dbl>, E.5.var <dbl>, B.var <dbl>, uSimp.var <dbl>,
## #   NMLG.var <dbl>, allele <dbl>, `1-D` <dbl>, Hexp <dbl>, Evenness <dbl>,
## #   tab <list>, allelecc <dbl>, `1-Dcc` <dbl>, Hexpcc <dbl>,
## #   Evennesscc <dbl>, tabcc <list>, H.locus <dbl>, G.locus <dbl>,
## #   lambda.locus <dbl>, E.5.locus <dbl>, uSimp.locus <dbl>, CF <dbl>,
## #   source <chr>, mean.rd <dbl>, sd.rd <dbl>, mad.rd <dbl>, min.rd <dbl>,
## #   max.rd <dbl>, mean.rdcc <dbl>, sd.rdcc <dbl>, mad.rdcc <dbl>,
## #   min.rdcc <dbl>, max.rdcc <dbl>, significance <fctr>
odd_mutation 
## # A tibble: 39,996 x 58
##           Ia  p.Ia     rbarD  p.rD      Iacc p.Iacc   rbarDcc p.rDcc
##        <dbl> <dbl>     <dbl> <dbl>     <dbl>  <dbl>     <dbl>  <dbl>
##  1 15.461693 0.001 0.7877455 0.001 14.106736  0.001 0.7167393  0.001
##  2 14.052082 0.001 0.7073192 0.001 12.350456  0.001 0.6226454  0.001
##  3 14.068551 0.001 0.7077485 0.001 13.246838  0.001 0.6685806  0.001
##  4 13.625753 0.001 0.6849662 0.001 12.401617  0.001 0.6242132  0.001
##  5 17.691915 0.001 0.9512911 0.001 17.320580  0.001 0.9388541  0.001
##  6 17.432074 0.001 0.9279263 0.001 17.178575  0.001 0.9170414  0.001
##  7 17.419960 0.001 0.9369048 0.001 16.995387  0.001 0.9199196  0.001
##  8 17.007769 0.001 0.9029985 0.001 15.596913  0.001 0.8152735  0.001
##  9  2.385947 0.001 0.3453806 0.001  2.086592  0.001 0.2992053  0.001
## 10  2.786404 0.001 0.3593397 0.001  2.658896  0.001 0.3448530  0.001
## # ... with 39,986 more rows, and 50 more variables: pop <chr>, run <chr>,
## #   seed <chr>, sexrate <chr>, gen <chr>, rep <chr>, sample <fctr>,
## #   mutation_rate <chr>, H <dbl>, G <dbl>, lambda <dbl>, E.5 <dbl>,
## #   B <dbl>, uSimp <dbl>, NMLG <dbl>, H.var <dbl>, G.var <dbl>,
## #   lambda.var <dbl>, E.5.var <dbl>, B.var <dbl>, uSimp.var <dbl>,
## #   NMLG.var <dbl>, allele <dbl>, `1-D` <dbl>, Hexp <dbl>, Evenness <dbl>,
## #   tab <list>, allelecc <dbl>, `1-Dcc` <dbl>, Hexpcc <dbl>,
## #   Evennesscc <dbl>, tabcc <list>, H.locus <dbl>, G.locus <dbl>,
## #   lambda.locus <dbl>, E.5.locus <dbl>, uSimp.locus <dbl>, CF <dbl>,
## #   source <chr>, mean.rd <dbl>, sd.rd <dbl>, mad.rd <dbl>, min.rd <dbl>,
## #   max.rd <dbl>, mean.rdcc <dbl>, sd.rdcc <dbl>, mad.rdcc <dbl>,
## #   min.rdcc <dbl>, max.rdcc <dbl>, significance <fctr>

All data coming in must be verified that we indeed have all data. Since we had a factorial design, we can create what we expect the populations (records of the simulations) to look like.

nrow(even_mutation) == 40000
## [1] FALSE
nrow(even_mutation)
## [1] 39996
nrow(odd_mutation) == 40000
## [1] FALSE
nrow(odd_mutation)
## [1] 39996

The numbers are off by a whopping 4 simulations. Overall, I’m okay with this.

Visualizations

Setup

The visualizations are summaries of the data and can help to understand the how the data is distributed to see any patterns.

This part is setting up some helpful variables and names that we will use over and over again.

old_theme <- theme_set(theme_bw(base_size = 16, base_family = "Helvetica"))
old_theme <- theme_update(axis.text.x = element_text(angle = 90, vjust = 0.5))
my_theme  <- theme(panel.border = element_blank()) +
  theme(panel.grid.major.x = element_blank()) +
  theme(panel.grid.major.y = element_line(color = "grey20")) +
  theme(panel.grid.minor.y = element_line(color = "grey50", linetype = 3))
asp       <- function(r) theme(aspect.ratio = r)
sample_colors <- scale_color_viridis(end = 0.75, discrete = TRUE, direction = -1, option = "A")
sample_fill <- scale_fill_viridis(end = 0.75, discrete = TRUE, direction = -1, option = "A")
# The p-value for rd is best visualized on a log scale. This provides that
# the color scaling for that transformation, emphasizing smaller p-values over
# larger ones.
log_rd_color <- function(breaks = c(0.005, 0.01, 0.025, 0.05, 0.1)){
  scale_color_viridis(option = "viridis", 
                      breaks = log(breaks), 
                      labels = breaks)
}
  
# Various labels with mathematical expressions are used for the plots. 
# Pre-defining them here allows us to use eval(exp) instead of re-typing the
# entire expression every time.
rd    <- quote(expression(bar(r)[d]))
rdcc  <- quote(expression(paste(bar(r)[d], " (clone-corrected)")))
prd   <- quote(expression(paste("p-value (", bar(r)[d], ")")))
prdcc <- quote(expression(paste("p-value (", bar(r)[d], " (clone-corrected))")))
E5g   <- quote(expression(paste("Genotypic evenness (", E[5][G], ")")))
E5a   <- quote(expression(paste("Mean allelic evenness (", E[5][A], ")")))
Hexp  <- quote(expression(paste("Nei's gene diversity (", H[exp], ")")))
uSimp <- "Unbiased simpson's index (genotypic diversity)"


fancy_clone_correct <- . %>% 
  mutate(clone_correct = factor(clone_correct, labels = c("uncorrected", "clone-corrected")))
fancy_sample <- . %>%  
  mutate(sample = forcats::lvls_revalue(sample, paste("n =", sort(unique(sample)))))
fancy_mutation_rate <- . %>% 
  mutate(mutation_rate = factor(mutation_rate, labels = c("Even Mutation Rate", "Uneven Mutation Rate")))
rd_sexrate <- vals %>% 
  select(sexrate, rbarD, rbarDcc, sample, mutation_rate) %>%
  gather("clone_correct", "rd", rbarD, rbarDcc) %>% 
  fancy_clone_correct %>%
  fancy_sample %>%
  fancy_mutation_rate %>%
  ggplot(aes(x = sexrate, y = rd, fill = clone_correct)) +
  # geom_boxplot(outlier.size = 0.25) +
  geom_violin(scale = "width", draw_quantiles = c(0.25, 0.5, 0.75)) +
  facet_grid(sample~mutation_rate, switch = "y") +
  scale_y_continuous(position = "right") +
  theme(text = element_text(size = 16)) +
  my_theme +
  theme(strip.text = element_text(size = 16)) +
  labs(list(
    x = "Rate of Sexual Reproduction",
    y = eval(rd),
    fill = "Clone Correction"
    # title = "Standardized Index of Association vs Sex",
    # subtitle = "for even and uneven mutation rates"
    )
    ) +
  # theme_minimal() + 
  # theme(strip.text = element_blank()) +
  # theme(text = element_blank()) +
  # theme(title = element_blank()) +
  # theme(aspect.ratio = 1/2) +
  theme(strip.background = element_blank()) +
  theme(legend.position = "bottom") +
  scale_fill_grey(start = 1, end = 0.5)
  

# ggsave(file = here::here('manuscript', 'figure', 'rd_sexrate.pdf'),
#        dpi = 600, width = 6, height = 7)
rd_sexrate
## Warning: Removed 105 rows containing non-finite values (stat_ydensity).

rd_pval <- vals %>% 
  select(sexrate, p.rD, p.rDcc, sample, mutation_rate) %>%
  gather("clone_correct", "prd", p.rD, p.rDcc) %>% 
  fancy_clone_correct %>%
  fancy_sample %>%
  fancy_mutation_rate %>%
  ggplot(aes(x = sexrate, y = prd, fill = sample)) +
  geom_boxplot(outlier.size = 0.25) +
  scale_y_log10(breaks = c(0.01, 0.1, 1),
                minor_breaks = c((1:9)*0.001, (2:9)*0.01, (2:9)*0.1)) +
  facet_grid(mutation_rate~clone_correct) +
  my_theme +
  theme(panel.grid.minor.y = element_line(color = "grey60", linetype = 1)) +
  labs(list(
    x = "Rate of Sexual Reproduction",
    y = eval(prd),
    title = "Detection of clonal reproduction",
    subtitle = "for even and uneven mutation rates")
    ) +
  sample_fill
rd_pval
## Warning: Removed 105 rows containing non-finite values (stat_boxplot).

rd_pval_cc <- vals %>% 
  select(p.rDcc, p.rD, mutation_rate, sample, sexrate, NMLG) %>%
  fancy_sample %>%
  ggplot(aes(x = p.rDcc, y = p.rD, pch = mutation_rate)) +
  geom_point(aes(color = log(as.numeric(substr(sample, 5, 10)) - NMLG + 1)), alpha = 0.25) +
  geom_abline(slope = 1) +
  scale_x_log10() +
  scale_y_log10() +
  scale_color_viridis(breaks = log(c(6, 11, 26, 76, 101)),
                      labels = c(5, 10, 25, 75, 100),
                      option = "A",
                      direction = -1) +
  facet_grid(sexrate ~ sample) +
  ylab(eval(prd)) +
  xlab(eval(prdcc)) +
  labs(list(
    pch = "Mutation Rate",
    color = "N - MLG"
    )) +
  guides(pch = guide_legend(override.aes = list(alpha = 1))) +
  theme(legend.position = "bottom") +
  theme(legend.box = "vertical") +
  theme(text = element_text(size = 16)) +
  theme(strip.text = element_text(size = 16, face = "bold")) +
  theme(strip.background = element_blank()) +
  asp(1)
rd_cc <- vals %>% 
  select(rbarD, rbarDcc, mutation_rate, sample, sexrate, NMLG) %>%
  fancy_sample %>%
  ggplot(aes(x = rbarDcc, y = rbarD, pch = mutation_rate)) +
  geom_point(aes(color = log(as.numeric(substr(sample, 5, 10)) - NMLG + 1)), alpha = 0.25) +
  geom_abline(slope = 1) +
  # scale_x_log10() +
  # scale_y_log10() +
  scale_color_viridis(breaks = log(c(6, 11, 26, 76, 101)),
                      labels = c(5, 10, 25, 75, 100)) +
  facet_grid(sexrate ~ sample) +
  ylab(eval(rd)) +
  xlab(eval(rdcc)) +
  labs(list(
    pch = "Mutation Rate",
    color = "N - MLG"
    )) +
  guides(pch = guide_legend(override.aes = list(alpha = 1))) +
  theme(legend.position = "bottom") +
  theme(legend.box = "vertical") +
  theme(text = element_text(size = 16)) +
  theme(strip.text = element_text(size = 16, face = "bold")) +
  theme(strip.background = element_blank()) +
  asp(1)
rd_pval_cc
## Warning: Removed 74 rows containing missing values (geom_point).

rd_cc
## Warning: Removed 74 rows containing missing values (geom_point).

vals %>%
  fancy_sample %>%
  fancy_mutation_rate %>%
  ggplot(aes(x = sexrate, y = rbarD, color = log(p.rD))) +
  geom_jitter(alpha = 0.25, height = 0) +
  geom_boxplot(color = "black", notch = TRUE, width = 0.5, alpha = 0.25) +
  log_rd_color() + 
  my_theme + 
  facet_grid(sample~mutation_rate) +
  labs(list(
    x = "Rate of Sexual Reproduction",
    y = eval(rd),
    color = eval(prd),
    title = "Detection of clonal reproduction",
    subtitle = "for even and uneven mutation rates")
    )
## Warning: Removed 31 rows containing non-finite values (stat_boxplot).
## Warning: Removed 31 rows containing missing values (geom_point).

ggplot(even_mutation, aes(x = rbarD, y = rbarDcc, color = significance)) +
  geom_point(alpha = 0.25) +
  geom_abline(slope = 1) +
  facet_grid(sample~sexrate) +
  scale_x_continuous(breaks = c(0, 0.5, 1)) +
  theme(legend.position = "bottom") +
  guides(color = guide_legend(override.aes = list(alpha = 1))) +
  asp(1) +
  labs(list(
    x = eval(rd),
    y = eval(rdcc),
    title = "Differences in probability due to clone correction", 
    subtitle = "20 loci; even mutation rate")
  ) 
## Warning: Removed 68 rows containing missing values (geom_point).

ggplot(odd_mutation, aes(x = rbarD, y = rbarDcc, color = significance)) +
  geom_point(alpha = 0.25) +
  geom_abline(slope = 1) +
  facet_grid(sample~sexrate) +
  scale_x_continuous(breaks = c(0, 0.5, 1)) +
  theme(legend.position = "bottom") +
  guides(color = guide_legend(override.aes = list(alpha = 1))) +
  asp(1) +
  labs(list(
    x = eval(rd),
    y = eval(rdcc),
    title = "Differences in probability due to clone correction", 
    subtitle = "21 loci; uneven mutation rate")
  ) 
## Warning: Removed 6 rows containing missing values (geom_point).

ggplot(vals, aes(x = Hexp, y = rbarD, color = log(p.rD))) +
  geom_point(alpha = 0.25) +
  log_rd_color() +
  facet_wrap(~mutation_rate) +
  labs(list(
    title = "Index of association vs. Gene Diversity",
    subtitle = "for even and uneven mutation rates",
    y = eval(rd),
    x = eval(Hexp),
    color = eval(prd))
    )
## Warning: Removed 31 rows containing missing values (geom_point).

ggplot(even_mutation, aes(x = Hexp, y = rbarD, color = log(p.rD))) +
  geom_point(alpha = 0.25) +
  log_rd_color() + 
  asp(1) +
  facet_grid(sample~sexrate) +
  labs(list(
    title = "Index of association vs. Gene Diversity",
    subtitle = "by sex rate and sample size for 20 loci; even mutation rate",
    y = eval(rd),
    x = eval(Hexp),
    color = eval(prd))
    )
## Warning: Removed 25 rows containing missing values (geom_point).

ggplot(odd_mutation, aes(x = Hexp, y = rbarD, color = log(p.rD))) +
  geom_point(alpha = 0.25) +
  log_rd_color() + 
  asp(1) +
  facet_grid(sample~sexrate) +
  labs(list(
    title = "Index of association vs. Gene Diversity",
    subtitle = "by sex rate and sample size for 21 loci; uneven mutation rate",
    y = eval(rd),
    x = eval(Hexp),
    color = eval(prd))
    )
## Warning: Removed 6 rows containing missing values (geom_point).

ggplot(even_mutation, aes(x = Evenness, y = E.5, color = rbarD)) +
  geom_point(alpha = 0.25) +
  scale_color_viridis() +
  geom_abline(slope = 1) +
  asp(0.75) +
  facet_grid(sample~sexrate) +
  xlab(eval(E5a)) +
  ylab(eval(E5g))
## Warning: Removed 6 rows containing missing values (geom_point).

ggplot(odd_mutation, aes(x = Evenness, y = E.5, color = rbarD)) +
  geom_point(alpha = 0.25) +
  scale_color_viridis() +
  geom_abline(slope = 1) +
  asp(0.75) +
  facet_grid(sample~sexrate) +
  xlab(eval(E5a)) +
  ylab(eval(E5g))

ggplot(even_mutation, aes(x = NMLG, y = rbarD, color = log(p.rD))) +
  geom_point(alpha = 0.25) +
  log_rd_color() +
  scale_x_continuous(breaks = c(5, 10, 25, 50, 100)) +
  asp(0.75) +
  facet_grid(sexrate~sample, scale = "free_x") +
  xlab("Number of multilocus genotypes") +
  ylab(eval(rd))
## Warning: Removed 25 rows containing missing values (geom_point).

ggplot(odd_mutation, aes(x = NMLG, y = rbarD, color = log(p.rD))) +
  geom_point(alpha = 0.25) +
  log_rd_color() +
  scale_x_continuous(breaks = c(5, 10, 25, 50, 100)) +
  asp(0.75) +
  facet_grid(sexrate~sample, scale = "free_x") +
  xlab("Number of multilocus genotypes") +
  ylab(eval(rd))
## Warning: Removed 6 rows containing missing values (geom_point).

ggplot(even_mutation, aes(x = uSimp, y = rbarD, color = log(p.rD))) +
  geom_point(alpha = 0.25) +
  log_rd_color() +
  asp(0.75) +
  facet_grid(sample~sexrate) +
  xlab(uSimp) +
  ylab(eval(rd))
## Warning: Removed 25 rows containing missing values (geom_point).

ggplot(odd_mutation, aes(x = uSimp, y = rbarD, color = log(p.rD))) +
  geom_point(alpha = 0.25) +
  log_rd_color() +
  asp(0.75) +
  facet_grid(sample~sexrate) +
  xlab(uSimp) +
  ylab(eval(rd))
## Warning: Removed 6 rows containing missing values (geom_point).

ggplot(even_mutation, aes(x = E.5, y = rbarD, color = log(p.rD))) +
  geom_point(alpha = 0.25) +
  log_rd_color() +
  asp(0.75) +
  facet_grid(sample~sexrate) +
  xlab(eval(E5g)) +
  ylab(eval(rd))
## Warning: Removed 25 rows containing missing values (geom_point).

ggplot(odd_mutation, aes(x = E.5, y = rbarD, color = log(p.rD))) +
  geom_point(alpha = 0.25) +
  log_rd_color() +
  asp(0.75) +
  facet_grid(sample~sexrate) +
  xlab(eval(E5g)) +
  ylab(eval(rd))
## Warning: Removed 6 rows containing missing values (geom_point).

ggplot(filter(even_mutation, p.rD >= 0.05), aes(x = Evennesscc, y = rbarD, color = log(p.rD))) +
  geom_point(alpha = 0.25) +
  log_rd_color(c(0.05, 0.1, 0.25, 0.5, 1)) + 
  facet_grid(sexrate~sample) +
  theme_dark() +
  geom_vline(xintercept = max((even_mutation %>% filter(sexrate == "1.0000"))$Evennesscc), lty = 2) +
  asp(0.5) + 
  ggtitle(expression(paste(bar(r)[d], " vs. mean allele evenness")), 
          subtitle = "For p >= 0.05 with even mutation rates") +
  ylab(expression(bar(r)[d])) +
  xlab(expression(paste(E[5], " (averaged over 20 loci)")))

ggplot(filter(odd_mutation, p.rD >= 0.05), aes(x = Evennesscc, y = rbarD, color = log(p.rD))) +
  geom_point(alpha = 0.25) +
  log_rd_color(c(0.05, 0.1, 0.25, 0.5, 1)) + 
  facet_grid(sexrate~sample) +
  theme_dark() +
  geom_vline(xintercept = max((odd_mutation %>% filter(sexrate == "1.0000"))$Evennesscc), lty = 2) +
  asp(0.5) + 
  ggtitle(expression(paste(bar(r)[d], " vs. mean allele evenness")), 
          subtitle = "For p >= 0.05 with uneven mutation rates") +
  ylab(expression(bar(r)[d])) +
  xlab(expression(paste(E[5], " (averaged over 21 loci)")))

diversity_plot <- vals %>%
  fancy_mutation_rate %>%
  mutate(mutation_rate = paste0("plain(\"", mutation_rate, "\")")) %>%
  rename(`E[5]` = E.5,
       `E[5][A]` = Evennesscc,
       `H[exp]` = Hexpcc) %>%
  tidyr::gather("index", "value",  `E[5]`, H, G, `E[5][A]`, `H[exp]`) %>%
  mutate(index = forcats::fct_relevel(index, c("H", "E[5]", "G", "E[5][A]", "H[exp]"))) %>%
  ggplot(aes(x = sexrate, y = value, color = sample)) +
  stat_summary(fun.data = mean_sdl, geom = "pointrange",
               position = position_dodge(width = 0.75), alpha = 0.75) +
  facet_grid(index ~ mutation_rate, scales = "free_y", 
             labeller = label_parsed, switch = "y") +
  scale_y_continuous(position = "right") +
  theme(aspect.ratio = 0.5) +
  theme(plot.margin = unit(rep(0, 4), "lines")) +
  theme(text = element_text(size = 16)) +
  theme(legend.position = "bottom") +
  theme(strip.text = element_text(size = 16)) +
  theme(strip.background = element_blank()) +
  my_theme +
  labs(list(
    x = "Rate of Sexual Reproduction",
    color = "Sample Size"
  )) +
  sample_colors
# ggsave(file = here::here('manuscript', 'figure', 'diversity_stats.pdf'),
#        plot = diversity_plot,
#        dpi = 600, width = 6, height = 8)
diversity_plot
## Warning: Removed 6 rows containing non-finite values (stat_summary).

ggplot(vals, aes(x = CF, y = B, color = sample)) +
  geom_point() +
  facet_grid(sample~mutation_rate)
## Warning: Removed 24638 rows containing missing values (geom_point).